This is just a work book from the data analysis that I did with a team at the 2nd Annual CUA data science hackathon. This is mostly meant to show some of my methods when dealing with 11 years of voter data and piecing together a story. Feel free to look over it, this file will soon be refined, as this is the unrefined work of a 5 hour coding sprint. I recomend looking at the end for the draft of the crime explorer app. With some more time invested into it could become a great tool.
Historical data from 1992 to 2018 May need to look at history of all of these elections to get context. We have a couple tasks, find some reasoning behind all these datasets, figure out how to join them together. Or atleast we can join them, into a mega database and then nest them together by year or something. Building a solid database will be the only way to win this.
Starting from 08 the data seems to get tidy. 2010 is when they adopt a tidy data mindset with standardized data.
library(tidyverse)
## -- Attaching packages ---------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.1.1 v purrr 0.3.2
## v tibble 2.1.1 v dplyr 0.8.0.1
## v tidyr 0.8.3 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts ------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(readr)
imports <- function(){
April_26_2011_Special_Election_Certified_Results <- read_csv("DataElection/DataElection/2011/April_26_2011_Special_Election_Certified_Results.csv")
April_3_2012_Primary_Election_Certified_Results <- read_csv("DataElection/DataElection/2012/April_3_2012_Primary_Election_Certified_Results.csv")
May_15_2012_Special_Election_Certified_Results <- read_csv("DataElection/DataElection/2012/May_15_2012_Special_Election_Certified_Results.csv")
November_6_2012_General_and_Special_Election_Certified_Results <- read_csv("DataElection/DataElection/2012/November_6_2012_General_and_Special_Election_Certified_Results.csv")
April_23_2013_Special_Election_Certified_Results <- read_csv("DataElection/DataElection/2013/April_23_2013_Special_Election_Certified_Results.csv")
April_1_2014_Primary_Election_Certified_Results <- read_csv("DataElection/DataElection/2014/April_1_2014_Primary_Election_Certified_Results.csv")
July_15_2014_Special_Election_Certified_Results <- read_csv("DataElection/DataElection/2014/July_15_2014_Special_Election_Certified_Results.csv")
November_4_2014_General_Election_Certified_Results <- read_csv("DataElection/DataElection/2014/November_4_2014_General_Election_Certified_Results.csv")
April_28_2015_Special_Election_Certified_Results <- read_csv("DataElection/DataElection/2015/April_28_2015_Special_Election_Certified_Results.csv")
June_14_2016_Primary_Election_Certified_Results <- read_csv("DataElection/DataElection/2016/June_14_2016_Primary_Election_Certified_Results.csv")
November_8_2016_General_Election_Certified_Results <- read_csv("DataElection/DataElection/2016/November_8_2016_General_Election_Certified_Results.csv")
December_4_2018_Special_Election_Certified_Results <- read_csv("DataElection/DataElection/2018/December_4_2018_Special_Election_Certified_Results.csv")
June_19_2018_Primary_Election_Certified_Results <- read_csv("DataElection/DataElection/2018/June_19_2018_Primary_Election_Certified_Results.csv")
November_6_2018_General_Election_Certified_Results <- read_csv("DataElection/DataElection/2018/November_6_2018_General_Election_Certified_Results.csv")
bigdata <- list(April_26_2011_Special_Election_Certified_Results, April_3_2012_Primary_Election_Certified_Results, May_15_2012_Special_Election_Certified_Results, November_6_2012_General_and_Special_Election_Certified_Results, April_23_2013_Special_Election_Certified_Results, April_1_2014_Primary_Election_Certified_Results, July_15_2014_Special_Election_Certified_Results, November_4_2014_General_Election_Certified_Results, April_28_2015_Special_Election_Certified_Results, June_14_2016_Primary_Election_Certified_Results, November_8_2016_General_Election_Certified_Results) %>% reduce(rbind)
data2018 <- November_6_2018_General_Election_Certified_Results %>%
full_join(June_19_2018_Primary_Election_Certified_Results) %>%
full_join(December_4_2018_Special_Election_Certified_Results)
DF2011to2018 <- data2018 %>%
full_join(bigdata, by = c("ElectionDate" = "ELECTION_DATE", "ElectionName" = "ELECTION_NAME", "ContestNumber" = "CONTEST_ID", "ContestName" = "CONTEST_NAME", "PrecinctNumber" = "PRECINCT_NUMBER", "WardNumber" = "WARD", "Candidate" = "CANDIDATE", "Party" = "PARTY", "Votes" = "VOTES"))
return(DF2011to2018)
}
DF2011to2018 <- imports()
## Parsed with column specification:
## cols(
## ELECTION_DATE = col_character(),
## ELECTION_NAME = col_character(),
## CONTEST_ID = col_double(),
## CONTEST_NAME = col_character(),
## PRECINCT_NUMBER = col_double(),
## WARD = col_double(),
## CANDIDATE = col_character(),
## PARTY = col_character(),
## VOTES = col_double()
## )
## Parsed with column specification:
## cols(
## ELECTION_DATE = col_character(),
## ELECTION_NAME = col_character(),
## CONTEST_ID = col_double(),
## CONTEST_NAME = col_character(),
## PRECINCT_NUMBER = col_double(),
## WARD = col_double(),
## CANDIDATE = col_character(),
## PARTY = col_character(),
## VOTES = col_double()
## )
## Parsed with column specification:
## cols(
## ELECTION_DATE = col_character(),
## ELECTION_NAME = col_character(),
## CONTEST_ID = col_double(),
## CONTEST_NAME = col_character(),
## PRECINCT_NUMBER = col_double(),
## WARD = col_double(),
## CANDIDATE = col_character(),
## PARTY = col_character(),
## VOTES = col_double()
## )
## Parsed with column specification:
## cols(
## ELECTION_DATE = col_character(),
## ELECTION_NAME = col_character(),
## CONTEST_ID = col_double(),
## CONTEST_NAME = col_character(),
## PRECINCT_NUMBER = col_double(),
## WARD = col_double(),
## CANDIDATE = col_character(),
## PARTY = col_character(),
## VOTES = col_double()
## )
## Parsed with column specification:
## cols(
## ELECTION_DATE = col_character(),
## ELECTION_NAME = col_character(),
## CONTEST_ID = col_double(),
## CONTEST_NAME = col_character(),
## PRECINCT_NUMBER = col_double(),
## WARD = col_double(),
## CANDIDATE = col_character(),
## PARTY = col_character(),
## VOTES = col_double()
## )
## Parsed with column specification:
## cols(
## ELECTION_DATE = col_character(),
## ELECTION_NAME = col_character(),
## CONTEST_ID = col_double(),
## CONTEST_NAME = col_character(),
## PRECINCT_NUMBER = col_double(),
## WARD = col_double(),
## CANDIDATE = col_character(),
## PARTY = col_character(),
## VOTES = col_double()
## )
## Parsed with column specification:
## cols(
## ELECTION_DATE = col_character(),
## ELECTION_NAME = col_character(),
## CONTEST_ID = col_double(),
## CONTEST_NAME = col_character(),
## PRECINCT_NUMBER = col_double(),
## WARD = col_double(),
## CANDIDATE = col_character(),
## PARTY = col_character(),
## VOTES = col_double()
## )
## Parsed with column specification:
## cols(
## ELECTION_DATE = col_character(),
## ELECTION_NAME = col_character(),
## CONTEST_ID = col_double(),
## CONTEST_NAME = col_character(),
## PRECINCT_NUMBER = col_double(),
## WARD = col_double(),
## CANDIDATE = col_character(),
## PARTY = col_character(),
## VOTES = col_double()
## )
## Parsed with column specification:
## cols(
## ELECTION_DATE = col_character(),
## ELECTION_NAME = col_character(),
## CONTEST_ID = col_double(),
## CONTEST_NAME = col_character(),
## PRECINCT_NUMBER = col_double(),
## WARD = col_double(),
## CANDIDATE = col_character(),
## PARTY = col_character(),
## VOTES = col_double()
## )
## Parsed with column specification:
## cols(
## ELECTION_DATE = col_character(),
## ELECTION_NAME = col_character(),
## CONTEST_ID = col_double(),
## CONTEST_NAME = col_character(),
## PRECINCT_NUMBER = col_double(),
## WARD = col_double(),
## CANDIDATE = col_character(),
## PARTY = col_character(),
## VOTES = col_double()
## )
## Parsed with column specification:
## cols(
## ELECTION_DATE = col_character(),
## ELECTION_NAME = col_character(),
## CONTEST_ID = col_double(),
## CONTEST_NAME = col_character(),
## PRECINCT_NUMBER = col_double(),
## WARD = col_double(),
## CANDIDATE = col_character(),
## PARTY = col_character(),
## VOTES = col_double()
## )
## Parsed with column specification:
## cols(
## ElectionDate = col_character(),
## ElectionName = col_character(),
## ContestNumber = col_double(),
## ContestName = col_character(),
## PrecinctNumber = col_double(),
## WardNumber = col_double(),
## Candidate = col_character(),
## Party = col_character(),
## Votes = col_double()
## )
## Parsed with column specification:
## cols(
## ElectionDate = col_character(),
## ElectionName = col_character(),
## ContestNumber = col_double(),
## ContestName = col_character(),
## PrecinctNumber = col_double(),
## WardNumber = col_double(),
## Candidate = col_character(),
## Party = col_character(),
## Votes = col_double()
## )
## Parsed with column specification:
## cols(
## ElectionDate = col_character(),
## ElectionName = col_character(),
## ContestNumber = col_double(),
## ContestName = col_character(),
## PrecinctNumber = col_double(),
## WardNumber = col_double(),
## Candidate = col_character(),
## Party = col_character(),
## Votes = col_double()
## )
## Joining, by = c("ElectionDate", "ElectionName", "ContestNumber", "ContestName", "PrecinctNumber", "WardNumber", "Candidate", "Party", "Votes")
## Joining, by = c("ElectionDate", "ElectionName", "ContestNumber", "ContestName", "PrecinctNumber", "WardNumber", "Candidate", "Party", "Votes")
library(DataExplorer)
str(DF2011to2018)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 105671 obs. of 9 variables:
## $ ElectionDate : chr "11/6/2018 12:00:00 AM" "11/6/2018 12:00:00 AM" "11/6/2018 12:00:00 AM" "11/6/2018 12:00:00 AM" ...
## $ ElectionName : chr "General Election" "General Election" "General Election" "General Election" ...
## $ ContestNumber : num -2 -1 301 301 301 301 301 301 301 301 ...
## $ ContestName : chr "REGISTERED VOTERS - TOTAL" "BALLOTS CAST - TOTAL" "DELEGATE TO THE HOUSE OF REPRESENTATIVES" "DELEGATE TO THE HOUSE OF REPRESENTATIVES" ...
## $ PrecinctNumber: num 1 1 1 1 1 1 1 1 1 1 ...
## $ WardNumber : num 6 6 6 6 6 6 6 6 6 6 ...
## $ Candidate : chr NA NA "Bruce Majors" "Natale \"Lino\" Stracuzzi" ...
## $ Party : chr "CITYWIDE" "CITYWIDE" "LIB" "STG" ...
## $ Votes : num 6736 2813 40 96 2390 ...
plot_intro(DF2011to2018)
plot_missing(DF2011to2018)
plot_bar(DF2011to2018)
## 2 columns ignored with more than 50 categories.
## ContestName: 749 categories
## Candidate: 1688 categories
plot_qq(DF2011to2018)
#we need to clean ElectionName so it works better
#Election name to Factor
#Character to Date
# Party to Factor
#Party to Factor
#Precinct and ward to factor, contest to continuous
COPY <- DF2011to2018
COPY$ElectionName <- str_replace(COPY$ElectionName, "D.C. General Election", "General Election")
COPY$ElectionName <- str_replace(COPY$ElectionName, "D.C. Generation Election", "General Election")
COPY$ElectionName <- str_replace(COPY$ElectionName, "D.C. Special Election", "Special Election")
COPY$ElectionName <- str_replace(COPY$ElectionName, "D.C. Primary Election", "Primary Election")
plot_bar(COPY)
## 2 columns ignored with more than 50 categories.
## ContestName: 749 categories
## Candidate: 1688 categories
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
COPY$ElectionDate <- str_replace(COPY$ElectionDate, "6/19/2018 12:00:00 AM", "6/19/2018")
COPY$ElectionDate <- str_replace(COPY$ElectionDate, "11/6/2018 12:00:00 AM", "11/6/2018")
COPY$ElectionDate <- str_replace(COPY$ElectionDate, "12/4/2018 12:00:00 AM", "12/4/2018")
COPY$ElectionDate <- as.Date(COPY$ElectionDate, format = "%m/%d/%Y")
COPY$ContestName <- factor(COPY$ContestName)
COPY$WardNumber <- factor(COPY$WardNumber)
COPY$PrecinctNumber <- factor(COPY$PrecinctNumber)
str(COPY)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 105671 obs. of 9 variables:
## $ ElectionDate : Date, format: "2018-11-06" "2018-11-06" ...
## $ ElectionName : chr "General Election" "General Election" "General Election" "General Election" ...
## $ ContestNumber : num -2 -1 301 301 301 301 301 301 301 301 ...
## $ ContestName : Factor w/ 749 levels "ADVISORY NEIGHBORHOOD COMMISSIONER 1A01",..: 725 617 656 656 656 656 656 656 656 656 ...
## $ PrecinctNumber: Factor w/ 143 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ WardNumber : Factor w/ 8 levels "1","2","3","4",..: 6 6 6 6 6 6 6 6 6 6 ...
## $ Candidate : chr NA NA "Bruce Majors" "Natale \"Lino\" Stracuzzi" ...
## $ Party : chr "CITYWIDE" "CITYWIDE" "LIB" "STG" ...
## $ Votes : num 6736 2813 40 96 2390 ...
# extract totals into a total data frame
unique(COPY$ElectionDate)
## [1] "2018-11-06" "2018-06-19" "2018-12-04" "2011-04-26" "2012-04-03"
## [6] "2012-05-15" "2012-11-06" "2013-04-23" "2014-04-01" "2014-07-15"
## [11] "2014-11-04" "2015-04-28" "2016-06-14" "2016-11-08"
COPY2 <- COPY
totals <- COPY2 %>%
filter(str_detect(ContestName, "REGISTERED")) %>%
mutate(totals = Votes) %>%
select(-Votes)
COPY2$ContestName <- tolower(COPY2$ContestName)
removal <- str_detect(COPY2$ContestName, "registered")
COPY2 <- COPY2[-removal, ]
copy3 <- COPY2 %>%
full_join(totals[, c("ElectionDate", "PrecinctNumber", "WardNumber", "totals")], by = c("ElectionDate", "PrecinctNumber", "WardNumber"))
library(readxl)
datelist <- read_excel("datelist.xlsx")
## New names:
## * `` -> ...1
str(datelist)
## Classes 'tbl_df', 'tbl' and 'data.frame': 14 obs. of 11 variables:
## $ ...1 : num 1 2 3 4 5 6 7 8 9 10 ...
## $ Date : POSIXct, format: "2018-11-06" "2018-06-19" ...
## $ MaximumTemp : num 60 90 43 84 71 81 45 62 62 88 ...
## $ MinimumTemp : num 51 70 29 68 32 65 28 46 38 71 ...
## $ AvgTemp : num 55.5 80 36 76 51.5 73 36.5 54 50 79.5 ...
## $ Departure : num 5.9 6.3 -4 19 2 10.4 -13.1 -2.1 1.2 2.2 ...
## $ HDD : num 9 0 29 0 13 0 28 11 15 0 ...
## $ CDD : num 0 15 0 11 0 8 0 0 0 15 ...
## $ Precipitation: num 0.92 0.01 0 0 0 0.39 0 0 0 0.84 ...
## $ New Snow : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Snow Depth : num 0 0 0 0 0 0 0 0 0 0 ...
COPY2$ElectionDate <- as.POSIXct(COPY2$ElectionDate)
COPY2 %>%
left_join(datelist, by = c("ElectionDate" = "Date")) %>%
ggplot(aes(x = ElectionDate, y = Votes, fill = AvgTemp)) +
geom_bar(stat = "identity")
totals2 <- totals %>%
group_by(WardNumber, ElectionDate) %>%
summarize(totalsum = sum(totals))
COPY2 %>%
filter(str_detect(ContestName, "total")) %>%
group_by(WardNumber, ElectionDate) %>%
summarize(Votesum = sum(Votes)) %>%
ggplot(aes(x = WardNumber, y = Votesum)) +
geom_bar(stat = "identity", fill = "green") +
geom_bar(data = totals2, aes(x = WardNumber, y = totalsum), fill = "red", alpha = 0.8, stat = "identity", inherit.aes = FALSE)
library(ggthemes)
COPY2 %>%
group_by(WardNumber) %>%
summarize(Votesum = sum(Votes)) %>%
ggplot(aes(x = WardNumber, y = Votesum)) +
geom_bar(stat = "identity") +
theme_stata() +
xlab("Ward") +
ylab("Sum of Votes") +
ggtitle("Votes per Ward", subtitle = "2011 to 2018")
import_crime <- function(){
Crime_Incidents_in_2013 <- read_csv("drive-download-20190330T183559Z-001/Crime_Incidents_in_2013.csv")
Crime_Incidents_in_2014 <- read_csv("drive-download-20190330T183559Z-001/Crime_Incidents_in_2014.csv")
Crime_Incidents_in_2015 <- read_csv("drive-download-20190330T183559Z-001/Crime_Incidents_in_2015.csv")
Crime_Incidents_in_2016 <- read_csv("drive-download-20190330T183559Z-001/Crime_Incidents_in_2016.csv")
Crime_Incidents_in_2017 <- read_csv("drive-download-20190330T183559Z-001/Crime_Incidents_in_2017.csv")
Crime_Incidents_in_2018 <- read_csv("drive-download-20190330T183559Z-001/Crime_Incidents_in_2018.csv")
crime_data <- list(Crime_Incidents_in_2013, Crime_Incidents_in_2014, Crime_Incidents_in_2015, Crime_Incidents_in_2016, Crime_Incidents_in_2017, Crime_Incidents_in_2018) %>% reduce(rbind)
return(crime_data)
}
crime_data <- import_crime()
## Parsed with column specification:
## cols(
## .default = col_character(),
## X = col_double(),
## Y = col_double(),
## REPORT_DAT = col_datetime(format = ""),
## XBLOCK = col_double(),
## YBLOCK = col_double(),
## WARD = col_double(),
## DISTRICT = col_double(),
## PSA = col_double(),
## LATITUDE = col_double(),
## LONGITUDE = col_double(),
## START_DATE = col_datetime(format = ""),
## END_DATE = col_datetime(format = ""),
## OBJECTID = col_double()
## )
## See spec(...) for full column specifications.
## Parsed with column specification:
## cols(
## .default = col_character(),
## X = col_double(),
## Y = col_double(),
## REPORT_DAT = col_datetime(format = ""),
## XBLOCK = col_double(),
## YBLOCK = col_double(),
## WARD = col_double(),
## DISTRICT = col_double(),
## PSA = col_double(),
## LATITUDE = col_double(),
## LONGITUDE = col_double(),
## START_DATE = col_datetime(format = ""),
## END_DATE = col_datetime(format = ""),
## OBJECTID = col_double()
## )
## See spec(...) for full column specifications.
## Parsed with column specification:
## cols(
## .default = col_character(),
## X = col_double(),
## Y = col_double(),
## REPORT_DAT = col_datetime(format = ""),
## XBLOCK = col_double(),
## YBLOCK = col_double(),
## WARD = col_double(),
## DISTRICT = col_double(),
## PSA = col_double(),
## LATITUDE = col_double(),
## LONGITUDE = col_double(),
## START_DATE = col_datetime(format = ""),
## END_DATE = col_datetime(format = ""),
## OBJECTID = col_double()
## )
## See spec(...) for full column specifications.
## Parsed with column specification:
## cols(
## .default = col_character(),
## X = col_double(),
## Y = col_double(),
## REPORT_DAT = col_datetime(format = ""),
## XBLOCK = col_double(),
## YBLOCK = col_double(),
## WARD = col_double(),
## DISTRICT = col_double(),
## PSA = col_double(),
## LATITUDE = col_double(),
## LONGITUDE = col_double(),
## START_DATE = col_datetime(format = ""),
## END_DATE = col_datetime(format = ""),
## OBJECTID = col_double()
## )
## See spec(...) for full column specifications.
## Parsed with column specification:
## cols(
## .default = col_character(),
## X = col_double(),
## Y = col_double(),
## REPORT_DAT = col_datetime(format = ""),
## XBLOCK = col_double(),
## YBLOCK = col_double(),
## WARD = col_double(),
## DISTRICT = col_double(),
## PSA = col_double(),
## LATITUDE = col_double(),
## LONGITUDE = col_double(),
## START_DATE = col_datetime(format = ""),
## END_DATE = col_datetime(format = ""),
## OBJECTID = col_double()
## )
## See spec(...) for full column specifications.
## Parsed with column specification:
## cols(
## .default = col_character(),
## X = col_double(),
## Y = col_double(),
## CCN = col_double(),
## REPORT_DAT = col_datetime(format = ""),
## XBLOCK = col_double(),
## YBLOCK = col_double(),
## WARD = col_double(),
## DISTRICT = col_double(),
## PSA = col_double(),
## LATITUDE = col_double(),
## LONGITUDE = col_double(),
## START_DATE = col_datetime(format = ""),
## END_DATE = col_datetime(format = ""),
## OBJECTID = col_double()
## )
## See spec(...) for full column specifications.
crime_sum <- crime_data %>%
group_by(WARD) %>%
summarize(n = n()) %>%
na.omit()
crime_data %>%
group_by(OFFENSE) %>%
summarise()
crime_data %>%
filter(OFFENSE == "HOMICIDE") %>%
group_by(WARD) %>%
summarize(crimenumber = n()) %>%
ggplot(aes(x = factor(WARD), y = crimenumber)) +
geom_bar(stat = "identity") +
xlab("Ward") +
theme_stata() +
ylab("Frequency of Homicide") +
ggtitle("Homicide frequency per ward", subtitle = "2013 to 2018")
library(ggmap)
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
DCMAP <- get_map("Washington, DC", zoom = 12)
## Source : https://maps.googleapis.com/maps/api/staticmap?center=Washington,%20DC&zoom=12&size=640x640&scale=2&maptype=terrain&language=en-EN&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Washington,+DC&key=xxx
set.seed(20)
clusters <- kmeans(crime_data[ ,1:2], 8)
crime_data$ESTCLUSTER <- as.factor(clusters$cluster)
ggmap(DCMAP) + geom_point(data = crime_data, aes(x = X, y = Y, col = factor(ESTCLUSTER)), alpha = 0.4) +
ggtitle("DC Wards using Kmeans on crime data")
## Warning: Removed 4582 rows containing missing values (geom_point).
ggmap(DCMAP) + geom_point(data = crime_data, aes(x = X, y = Y, col = factor(WARD)), alpha = 0.4)
## Warning: Removed 4582 rows containing missing values (geom_point).
ggmap(DCMAP) +
geom_bin2d(data = crime_data %>% filter(OFFENSE == 'HOMICIDE'), aes(x = X, y = Y), alpha = 0.8)
## Warning: Removed 50 rows containing non-finite values (stat_bin2d).
library(tidyverse)
library(maptools)
## Loading required package: sp
## Checking rgeos availability: TRUE
library(sp)
library(mapdata)
## Loading required package: maps
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
library(rgl)
library(rgdal)
## rgdal: version: 1.4-3, (SVN revision 828)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 2.2.3, released 2017/11/20
## Path to GDAL shared files: C:/Users/bingo/Documents/R/win-library/3.5/rgdal/gdal
## GDAL binary built with GEOS: TRUE
## Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
## Path to PROJ.4 shared files: C:/Users/bingo/Documents/R/win-library/3.5/rgdal/proj
## Linking to sp version: 1.3-1
library(ggplot2)
library(rgeos)
## rgeos version: 0.4-2, (SVN revision 581)
## GEOS runtime version: 3.6.1-CAPI-1.10.1
## Linking to sp version: 1.3-1
## Polygon checking: TRUE
##
## Attaching package: 'rgeos'
## The following object is masked from 'package:rgl':
##
## triangulate
precinct <- readOGR("C:/Users/bingo/Desktop/CUA HaXS/Voting_Precinct__2012/Voting_Precinct__2012.shp")
## OGR data source with driver: ESRI Shapefile
## Source: "C:\Users\bingo\Desktop\CUA HaXS\Voting_Precinct__2012\Voting_Precinct__2012.shp", layer: "Voting_Precinct__2012"
## with 143 features
## It has 6 fields
## Integer64 fields read as strings: OBJECTID_1 OBJECTID
ward <- readOGR("C:/Users/bingo/Desktop/CUA HaXS/Ward_from_2012/Ward_from_2012.shp")
## OGR data source with driver: ESRI Shapefile
## Source: "C:\Users\bingo\Desktop\CUA HaXS\Ward_from_2012\Ward_from_2012.shp", layer: "Ward_from_2012"
## with 8 features
## It has 82 fields
## Integer64 fields read as strings: OBJECTID WARD POP_2000 POP_2010 POP_2011_2
precinct2 <- fortify(precinct)
## Regions defined for each Polygons
ward2 <- fortify(ward)
## Regions defined for each Polygons
precinctdat <- as.data.frame(precinct)
map1 <- ggplot() +
geom_polygon(data = precinct2, aes(long, lat, group = group),
colour = "black", fill = "white") +
geom_polygon(data = ward2, aes(long, lat, group = group, fill = group),
colour = "black", alpha = 0.3, size = 1) +
ggtitle("DC Precincts and Wards",
subtitle = "Est. 2012") +
coord_quickmap()+
theme_minimal()
map1
#opens up possibility of DC chloropleth map (interactive too)
library(ggmap)
library(sf)
## Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3
DCMAP <- get_map("Washington, DC", zoom = 12)
## Source : https://maps.googleapis.com/maps/api/staticmap?center=Washington,%20DC&zoom=12&size=640x640&scale=2&maptype=terrain&language=en-EN&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Washington,+DC&key=xxx
wardsp <- spTransform(ward, CRS("+proj=longlat +datum=WGS84"))
wardsp2 <- fortify(wardsp)
## Regions defined for each Polygons
#Add another column for ward
WARDWITHLABELS <- ward2 %>%
mutate(WARD =
case_when(
group == 0.1 ~ 8,
group == 1.1 ~ 6,
group == 2.1 ~ 7,
group == 3.1 ~ 2,
group == 4.1 ~ 1,
group == 5.1 ~ 5,
group == 6.1 ~ 3,
group == 7.1 ~ 4
))
# Get centroids to plot labels
ward_Cent <- gCentroid(ward, byid = TRUE)
ward_CentPoints <- as.data.frame(ward_Cent)
Wardlabs <- ward_CentPoints %>%
cbind(c(8, 6, 7, 2, 1, 5, 3, 4))
colnames(Wardlabs) <- c("x", "y", "WARD")
# Plot the points on the wards
ggplot() +
geom_polygon(data = WARDWITHLABELS, aes(long, lat, group = group, fill = factor(WARD)),
colour = "black", alpha = 0.3, size = 1) +
geom_text(data = Wardlabs, aes(x, y, label = WARD), size = 5) +
geom_point(data = crime_data %>% filter(OFFENSE == "HOMICIDE"), aes(x = X, y = Y), alpha = 0.4) +
ggtitle("DC Wards",
subtitle = "Est. 2012") +
theme(legend.position = "none") +
coord_quickmap()
#experiment with google maps
ggmap(DCMAP) +
geom_polygon(data = WARDWITHLABELS, aes(long, lat, group = group, fill = factor(WARD)),
colour = "black", alpha = 0.3, size = 1)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggmap':
##
## wind
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
Sys.setenv('MAPBOX_TOKEN' = 'pk.eyJ1IjoibWFzdGVyYmluZ28xIiwiYSI6ImNqdDluOHo2aDAxenQ0OW51dmdkOGIyaDkifQ.KMv1Wkds1VtmtzOOmMWuiw')
plot_mapbox(mode = 'scattermapbox') %>%
add_polygons(data = WARDWITHLABELS ,
x = ~ long, y = ~ lat, split = ~factor(WARD),
text = ~WARD, hoverinfo = "none", alpha = 0.3) %>%
add_markers(data = crime_data %>% filter(OFFENSE == "HOMICIDE"), x = ~X, y = ~Y, alpha = 1, color = "blue") %>%
layout(mapbox = list(zoom = 10.5,
center = list(lat = ~median(WARDWITHLABELS$lat),
lon = ~median(WARDWITHLABELS$long))
))
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
Attempting to make a small web app out of the crime data with a explorer.
library(crosstalk)
dooot <- SharedData$new(crime_data %>% filter(OFFENSE == "HOMICIDE") %>%
select(X, Y, METHOD, WARD))
bscols(
plot_mapbox() %>%
add_markers(data = dooot, x = ~X, y = ~Y, text = ~WARD, hoverinfo = "text") %>%
highlight(on = "plotly_selected", dynamic = TRUE) %>%
layout(mapbox = list(zoom = 10.5,
center = list(lat = ~median(WARDWITHLABELS$lat),
lon = ~median(WARDWITHLABELS$long)))),
DT::datatable(dooot)
)
## Adding more colors to the selection color palette.
## Setting the `off` event (i.e., 'plotly_deselect') to match the `on` event (i.e., 'plotly_selected'). You can change this default via the `highlight()` function.